library(data.table)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(stringr)
library(cowplot)
library(tidyr)
theme_set(theme_classic())
release_name = params$release_name
release_name## [1] "2023_12_JAX_RNAseq_ExtraEmbryonic"
result_dir = file.path("results", release_name)
enrichment_output_dir = file.path(result_dir, "enrichment")
res = readRDS(file.path(enrichment_output_dir,
sprintf("%s_enrichment.rds", release_name)))
names(res)## [1] "ExM_day_6_nor_ISL1_CE_up" "ExM_day_6_nor_ISL1_CE_down"
## [3] "ExM_day_6_nor_ISL1_KO_up" "ExM_day_6_nor_ISL1_KO_down"
## [5] "ExM_day_6_nor_ISL1_PTC_up" "ExM_day_6_nor_ISL1_PTC_down"
## [7] "PrS_day_6_hyp_EPAS1_CE_up" "PrS_day_6_hyp_EPAS1_CE_down"
## [9] "PrS_day_6_hyp_EPAS1_KO_up" "PrS_day_6_hyp_EPAS1_KO_down"
## [11] "PrS_day_6_hyp_EPAS1_PTC_up" "PrS_day_6_hyp_EPAS1_PTC_down"
## [13] "PrS_day_6_nor_EPAS1_CE_up" "PrS_day_6_nor_EPAS1_CE_down"
## [15] "PrS_day_6_nor_EPAS1_PTC_up" "PrS_day_6_nor_FOSB_CE_up"
## [17] "PrS_day_6_nor_FOSB_KO_up" "PrS_day_6_nor_FOSB_PTC_up"
## [19] "PrS_day_6_nor_GCM1_CE_up" "PrS_day_6_nor_GCM1_CE_down"
## [21] "PrS_day_6_nor_GCM1_KO_up" "PrS_day_6_nor_GCM1_KO_down"
## [23] "PrS_day_6_nor_GCM1_PTC_up" "PrS_day_6_nor_GCM1_PTC_down"
## [25] "PrS_day_6_nor_GRHL1_CE_up" "PrS_day_6_nor_GRHL1_CE_down"
## [27] "PrS_day_6_nor_GRHL1_KO_up" "PrS_day_6_nor_GRHL1_KO_down"
## [29] "PrS_day_6_nor_GRHL1_PTC_up" "PrS_day_6_nor_GRHL1_PTC_down"
## [31] "PrS_day_6_nor_POU2F3_CE_up" "PrS_day_6_nor_POU2F3_CE_down"
## [33] "PrS_day_6_nor_POU2F3_KO_up" "PrS_day_6_nor_PPARG_CE_up"
## [35] "PrS_day_6_nor_PPARG_CE_down" "PrS_day_6_nor_PPARG_KO_up"
## [37] "PrS_day_6_nor_PPARG_KO_down" "PrS_day_6_nor_PPARG_PTC_up"
## [39] "PrS_day_6_nor_PPARG_PTC_down" "PrS_day_6_EPAS1_CE_hyp_vs_nor_up"
## [41] "PrS_day_6_EPAS1_CE_hyp_vs_nor_down" "PrS_day_6_EPAS1_KO_hyp_vs_nor_up"
## [43] "PrS_day_6_EPAS1_KO_hyp_vs_nor_down" "PrS_day_6_EPAS1_PTC_hyp_vs_nor_up"
## [45] "PrS_day_6_EPAS1_PTC_hyp_vs_nor_down" "PrS_day_6_WT_WT_hyp_vs_nor_up"
## [47] "PrS_day_6_WT_WT_hyp_vs_nor_down"
multi_strategy_datasets = c("2023_12_JAX_RNAseq_ExtraEmbryonic",
"2024_05_JAX_RNAseq2_ExtraEmbryonic",
"2024_06_JAX_RNAseq_Neuroectoderm",
"2024_09_JAX_RNAseq6_Diverse",
"2024_09_JAX_RNAseq7_Reversion")
if (release_name%in%multi_strategy_datasets){
to_plot = grep("PTC|reverted|WT_WT_hyp_vs_nor", names(res))
res = res[to_plot]
}
sapply(res, function(x){sapply(x,nrow)})## $ExM_day_6_nor_ISL1_PTC_up
## reactome go_bp
## 2 10
##
## $ExM_day_6_nor_ISL1_PTC_down
## reactome go_bp
## 8 10
##
## $PrS_day_6_hyp_EPAS1_PTC_up
## go_bp
## 10
##
## $PrS_day_6_hyp_EPAS1_PTC_down
## reactome go_bp
## 10 10
##
## $PrS_day_6_nor_EPAS1_PTC_up
## reactome
## 1
##
## $PrS_day_6_nor_FOSB_PTC_up
## reactome
## 1
##
## $PrS_day_6_nor_GCM1_PTC_up
## reactome go_bp
## 10 10
##
## $PrS_day_6_nor_GCM1_PTC_down
## reactome go_bp
## 10 10
##
## $PrS_day_6_nor_GRHL1_PTC_up
## reactome go_bp
## 10 10
##
## $PrS_day_6_nor_GRHL1_PTC_down
## reactome go_bp
## 6 10
##
## $PrS_day_6_nor_PPARG_PTC_up
## reactome go_bp
## 10 10
##
## $PrS_day_6_nor_PPARG_PTC_down
## reactome go_bp
## 8 10
##
## $PrS_day_6_EPAS1_PTC_hyp_vs_nor_up
## reactome go_bp
## 10 10
##
## $PrS_day_6_EPAS1_PTC_hyp_vs_nor_down
## reactome go_bp
## 10 10
##
## $PrS_day_6_WT_WT_hyp_vs_nor_up
## reactome go_bp
## 10 10
##
## $PrS_day_6_WT_WT_hyp_vs_nor_down
## reactome go_bp
## 10 10
df_list = list()
for(label in names(res)){
res1 = res[[label]]
names(res1)[which(names(res1)=="go_bp")] = "GO biological process"
type = names(res1)
df_go = NULL
for(gset1 in type){
res_g1 = res1[[gset1]]
res_g1$type = gset1
df_go = rbind(df_go, res_g1)
}
df_list[[label]] = df_go
}df_size = NULL
figure_dir = file.path("figures", release_name)
goseq_figure_dir = file.path(figure_dir, "goseq_plots")
if (!file.exists(goseq_figure_dir)){
dir.create(goseq_figure_dir, recursive = TRUE)
}nchar_limit = 70
for(k in 1:length(df_list)){
d1 = df_list[[k]]
if(sum(d1$FDR < 0.05) == 0){ next }
d1 = d1[d1$FDR < 0.05,]
d1 = d1[order(d1$FDR, decreasing = TRUE),]
d1_label = sapply(d1$category, function(x) {
space_positions = gregexpr(" ", x)[[1]]
if (nchar(x) > nchar_limit) {
break_pos = max(space_positions[space_positions < nchar_limit])
return(paste0(paste0(rep(" ", nchar_limit-break_pos), collapse=""),
substr(x, 1, break_pos), "\n",
substr(x, break_pos+1, nchar(x))))
} else {
x = paste0(paste0(rep(" ", 1.1*(nchar_limit-nchar(x))), collapse=""), x)
return(x)
}
})
d1$y = 1:nrow(d1)
nm1 = names(df_list)[k]
nm1 = gsub("_", " ", nm1)
nm1 = gsub("PTC ", "", nm1)
nm1 = gsub("nor ", "Normoxia ", nm1)
nm1 = gsub("hyp ", "Hypoxia ", nm1)
nm1 = gsub("up", "up-regulated", nm1)
nm1 = gsub("down", "down-regulated", nm1)
if (grepl("Hypoxia vs Normoxia", nm1)){
nm1_substrings = strsplit(nm1, " Hypoxia vs Normoxia ")[[1]]
nm1 = paste0(nm1_substrings[1],
"\nHypoxia vs Normoxia\n",
nm1_substrings[2])
}else{
if (!grepl("EPAS1", nm1)){ # keep Normoxia for comparisons involving EPAS1 gene
nm1 = gsub("Normoxia ", "", nm1)
}
nm1 = sub("reverted ", "", nm1) # only replace the first occurrence of "reverted "
substrings = str_split(nm1, " ")[[1]]
nm1 = paste0(paste(substrings[1:(length(substrings)-1)], collapse = " "),
"\n", paste(substrings[length(substrings)], collapse = " "))
}
mycolors <- c("GO biological process" = "#F8766D", "reactome" = "#00BFC4")
filtered_colors <- mycolors[unique(d1$type)]
g1 = ggplot(d1, aes(x = -log10(FDR), y = y, color=type)) +
geom_point(size = 2.5) +
labs(x = "-log10(FDR)", y="", title = nm1) +
scale_y_continuous(breaks = 1:nrow(d1),
labels = d1_label,
limits = c(0.8, nrow(d1)+0.2)) +
theme(legend.position = "top") +
scale_color_manual(values = filtered_colors) +
guides(color = guide_legend(title = NULL))
h1 = 0.75*(nrow(d1)+0.4)/20 + 0.25
if (nrow(d1)<=2){
h1 = 0.75*(nrow(d1)+1)/20 + 0.25
}
canvas <- ggdraw() + draw_plot(g1, x = 0, y = 1-h1, width = 1, height = h1)
print(canvas)
if (!grepl("Hypoxia vs Normoxia", nm1)){
g2 = ggplot(d1, aes(x = -log10(FDR), y = y, color=type)) +
geom_point(size = 2.5) +
labs(x = "-log10(FDR)", y="") +
scale_y_continuous(breaks = 1:nrow(d1),
labels = d1_label,
limits = c(0.8, nrow(d1)+0.2)) +
theme(legend.position = "top") +
scale_color_manual(values = filtered_colors) +
guides(color = guide_legend(title = NULL)) +
theme(
panel.background = element_rect(fill = "transparent", color = NA),
plot.background = element_rect(fill = "transparent", color = NA),
legend.background = element_rect(fill = "transparent", color = NA)
)
h1 = 0.75*(nrow(d1)+0.4)/20 + 0.25
if (nrow(d1)<=2){
h1 = 0.75*(nrow(d1)+1)/20 + 0.25
}
canvas2 <- ggdraw() + draw_plot(g2, x = 0, y = 1-h1, width = 1, height = h1)
saved_figure_name = gsub("\n", "_", nm1)
saved_figure_name = paste0(gsub(" ", "_", saved_figure_name), ".svg")
ggsave(file=file.path(goseq_figure_dir, saved_figure_name),
plot=canvas2, width=8, height=5, bg = "transparent", units="in")
}
}gc()## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 1109111 59.3 2283981 122 NA 2283981 122.0
## Vcells 1947339 14.9 8388608 64 65536 3333335 25.5
sessionInfo()## R version 4.2.3 (2023-03-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tidyr_1.3.1 cowplot_1.1.3 stringr_1.5.1 ggrepel_0.9.5
## [5] ggpubr_0.6.0 ggplot2_3.5.1 data.table_1.15.4
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 xfun_0.47 bslib_0.8.0 purrr_1.0.2
## [5] carData_3.0-5 colorspace_2.1-1 vctrs_0.6.5 generics_0.1.3
## [9] htmltools_0.5.8.1 yaml_2.3.10 utf8_1.2.4 rlang_1.1.4
## [13] jquerylib_0.1.4 pillar_1.9.0 glue_1.7.0 withr_3.0.1
## [17] lifecycle_1.0.4 munsell_0.5.1 ggsignif_0.6.4 gtable_0.3.5
## [21] ragg_1.2.7 evaluate_0.24.0 labeling_0.4.3 knitr_1.48
## [25] fastmap_1.2.0 fansi_1.0.6 highr_0.11 broom_1.0.6
## [29] Rcpp_1.0.13 scales_1.3.0 backports_1.5.0 cachem_1.1.0
## [33] jsonlite_1.8.8 abind_1.4-5 farver_2.1.2 systemfonts_1.1.0
## [37] textshaping_0.3.7 digest_0.6.37 stringi_1.8.4 rstatix_0.7.2
## [41] dplyr_1.1.4 grid_4.2.3 cli_3.6.3 tools_4.2.3
## [45] magrittr_2.0.3 sass_0.4.9 tibble_3.2.1 car_3.1-2
## [49] pkgconfig_2.0.3 rmarkdown_2.28 svglite_2.1.3 R6_2.5.1
## [53] compiler_4.2.3